<?php

require_once('./discuz_publish.php');
set_time_limit(0);

function get_url($url, $base_url='')
{
	$content = file_get_contents($url);
	file_put_contents( dirname(__FILE__ ) . '/log', $content);
	//exit(0);
	preg_match_all('/<li><span class="hots">.*?<\/a><\/li>/', $content, $matches);
	$matches = $matches[0];
	if(is_array($matches))
	{
		foreach($matches as $v)
		{
			$v = substr($v, stripos($v, "href='")+strlen("href='"));
			$href = $base_url . substr($v, 0, stripos($v, "'"));

			$v = substr($v, stripos($v, 'title="')+strlen('title="'));
			$title = substr($v, 0, stripos($v, '"'));
			$result[] = array('href'=>$href, 'title'=>$title);
		}
	}
	//var_dump($result); exit(0);
	return $result;
	//file_put_contents( dirname(__FILE__ ) . '/log', var_export($result, true));
}

$base_url = 'http://www.jhyuwenfudao.com/';
$urls = array('http://www.jhyuwenfudao.com/NewsList-39.aspx');

if($_GET['i'] == 1)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList1-39_2.aspx');
}
else if($_GET['i'] == 2)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList1-39_3.aspx');	
}
else if($_GET['i'] == 3)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList-38.aspx');
}
else if($_GET['i'] == 4)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList1-38_2.aspx');
}
else if($_GET['i'] == 5)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList1-38_3.aspx');
}
else if($_GET['i'] == 6)
{
	//$urls = array('http://www.jhyuwenfudao.com/NewsList-38.aspx');
}
else if($_GET['i'] == 7)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList-37.aspx');
}
else if($_GET['i'] == 8)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList1-37_2.aspx');
}
else if($_GET['i'] == 9)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList1-37_3.aspx');
}
else if($_GET['i'] == 10)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList1-37_4.aspx');
}
else if($_GET['i'] == 11)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList1-37_5.aspx');
}
else if($_GET['i'] == 12)
{
	$urls = array('http://www.jhyuwenfudao.com/NewsList1-37_6.aspx');
}
else
{
	$ii = $_GET['ii'];
	if($ii == 0) 
	{
		$urls = array('http://www.jhyuwenfudao.com/NewsList-36.aspx');
	}
	else
	{
		$urls = array("http://www.jhyuwenfudao.com/NewsList1-36_$ii.aspx");
	}
}
if($_GET['url'])
{
	$urls = array($base_url . $_GET['url']);
	var_dump($urls);
}

$i = 0;
foreach($urls as $url)
{
	$result = get_url($url, $base_url);
	foreach($result as $v)
	{
		$i++;
		//if($i < 18) continue;
		get_content($v['href'], $v['title']);
	
		if($i == 500) exit(0);
	}
}

exit(0);

function get_next_page($content, $pattern, $pattern2)
{
	$ii = stripos($content, $pattern);

	if($ii !== false)
	{
		preg_match_all("/{$pattern2}/", $content, $matches);
		return $matches[1];
	}
	else
	{
		return false;
	}

}

function parse_content($content, $pattern, $pattern2)
{
	$ii = stripos($content, $pattern);
	if($ii !== false)
	{
		$content  = substr($content, $ii+strlen($pattern));
	}
	else
	{
		return '';
	}

	$ii = stripos($content, $pattern2);
	if($ii !== false)
	{
		$content = substr($content, 0, $ii + strlen($pattern2));
	}
	else
	{
		return '';
	}
	
	$content = preg_replace('/<h1>[^~]*?<\/div>/', '', $content);
	
	return $content;
}

function get_content($href, $title, $g_url='http://www.jhyuwenfudao.com/', $fid=83)
{
	$base_url = $href;
	$content = file_get_contents($base_url);

	$content = parse_content($content, '<div id="newsInfo">', '<div id="behavior">');
	file_put_contents( dirname(__FILE__ ) . '/log', $title . '{br}'. $content . '{br}');
	if($content)
	{
		$next_href = get_next_page($content, '<div id="custompage" >','<a href="(.*?)" title=".*?">\[.*?\]<\/a>.*?<\/div>');
		if(is_array($next_href))
		{
			foreach($next_href as $v)
			{
				$v = $g_url . $v;
				$tmp = file_get_contents($v);
				$content .= parse_content($tmp, '<div id="newsInfo">', '<div id="behavior">');
			}
		}
	}
	$content = preg_replace('/<div id="custompage" >[^~]*?<div id="behavior">/', '', $content);
	
	file_put_contents( dirname(__FILE__ ) . '/log', $title . '{br}'. $content . '{br}');
	
	discuz_publish(escape_s($title), escape_s($content), 83);
	//file_put_contents( dirname(__FILE__ ) . '/log', escape_s($title) . '{br}'. escape_s($cont) . '{br}', FILE_APPEND);
	//exit(0);
}

function escape_s($content)
{
	$content = str_replace( "'", "\'", $content);
	$content = str_replace('"', '\"', $content);
	$content = str_replace('<br />', '', $content);
	$content = preg_replace('/<div.*?>/i', '', $content);
	$content = preg_replace('/<\/div>/i', '', $content);
	$content = preg_replace('/<p>/i', "\r\n", $content);
	return $content;
}

?>